/****************************************************************************************************
* Paper:            Explaining the declining utilization of village clinics in rural China over time:
                    A Decomposition Approach

* Goal:             Summary Statistics, Main Regressions, Decomposition

* Created by:       Yunwei Chen
* Last Modified:    2022-04-19

* Input Data:       Decomposition_household.dta                 // Household dataset
                    Decomposition_village.dta                   // Village dataset
                    Decomposition_clinic.dta                    // Village clinic dataset
                    Decomposition_doctor.dta                    // Village doctor dataset
********************************************************************************************/


* Set up Directories
if c(username) == "yunweichen" {
cd "/Users/yunweichen/Desktop/Decomposition_Output"
global output "/Users/yunweichen/Desktop/Decomposition_Output"
global datadir "/Users/yunweichen/Documents/GitHub/Paper-Bypassing/DataSharing"
}

*********************************************************************
*          Variable Definition and Determination of Analysis     
*********************************************************************

use "$datadir/Decomposition_household.dta", clear              // 26592

	* Sample Constraint

	drop if MIGRANT==1                    // drop migrants (5320)
	drop if AGE<18                        // drop children (4519)
	
	table1, by(YEAR) vars(SICK bine \ LSEEKMS cat \ LFACILITY1 cat) cformat(%12.0f)

	keep if SICK==1                       // keep sick people (drop 5921)
	drop if LFACILITY1==6                 // drop unidentified facilities (drop 328/10832 (3.0%))
	drop if LFACILITY1==2                 // drop neighboring village clinic (drop 400/10832 (3.7%))

	* Define Facility Choice

	gen CHOICE=.
	replace CHOICE=1 if LSEEKMS==3|LSEEKMS==2
	replace CHOICE=2 if LFACILITY1==1
	replace CHOICE=3 if LFACILITY1==3
	replace CHOICE=4 if LFACILITY1==4
	replace CHOICE=5 if LFACILITY1==5
	labe define CHOICE 1 "[1] Self-Treatment" 2 "[2] VC" 3 "[3] THC" ///
		4 "[4] CH"  5 "[5] Outside County"
	label value CHOICE CHOICE
	tab CHOICE

	table1, by(YEAR) vars(CHOICE cat) cformat(%12.0f)            // 10104


	* Dependent variable

		* VC User
		gen VCUSER=(CHOICE==2)          
		tab VCUSER YEAR

		* Two Parts
		gen DOCVIST=(LSEEKMS==1)
		tab DOCVIST YEAR

		gen SELFTREAT=(DOCVIST==0)

		gen VCUSER_CON=(CHOICE==2) if LSEEKMS==1
		replace VCUSER_CON=. if LSEEKMS!=1
		tab VCUSER_CON YEAR
	
	* Health Need Variables

		* Disease Type
		tab LDISEASE, m     

		* Self-reported health
		tab HEALTH, m

	* Predisposing Variables

		* Age
		sum AGE     

			gen AGE_CAT = .
			replace AGE_CAT = 1 if AGE>=18 & AGE<35
			replace AGE_CAT = 2 if AGE>=35 & AGE<50
			replace AGE_CAT = 3 if AGE>=50 & AGE<65
			replace AGE_CAT = 4 if AGE>=65 & AGE<.
			label define AGECATL 1 "[1] 18-34 years" 2 "[2] 35-49 years" ///
				3 "[3] 50-64 years" 4 "[4] >=65 years"
			label values AGE_CAT AGECATL
			label variable AGE_CAT "Age category"
			tab AGE_CAT, m 

		* Gender
		tab GENDER, m     
		gen GENDER_MALE=(GENDER==1)
		replace GENDER_MALE=. if GENDER==.
		label variable GENDER_MALE "Male (0/1)"
		tab GENDER_MALE, m

		* Education
		tab EDU_CAT, m 

	* Enabling Variables

		* NCMS Insurance
		tab NCMS, m   
		tab NCMS_VC, m

		gen NCMS_REIM=.
		replace NCMS_REIM=1 if NCMS==0
		replace NCMS_REIM=2 if NCMS==1&(NCMS_VC==0|NCMS_VC==.)        // Missing means no village clinics
		replace NCMS_REIM=3 if NCMS==1&NCMS_VC==1
		tab NCMS NCMS_REIM, m

			label define NCMS_REIM 1 "[1] Non-NCMS" 2 "[2] NCMS without Reimbursement" 3 "[3] NCMS with Reimbursement"
			label value NCMS_REIM NCMS_REIM
			label var NCMS_REIM "NCMS Reimbursement in Village Clinics"
			tab NCMS_REIM, m
 
		* Household Asset
		d ASSET_INX2
		sum ASSET_INX2                 // 17 missing


	* Health Care Delivery System at the Village level

		* br if VD_DOCT==.|VD_DOCT==0  

		gen VGE_NOVC=(VD_DOCT==.|VD_DOCT==0)         // Not having village clinics or doctors (664, 6.3%)		
		label var VGE_NOVC "=1 if the village has no clinics or doctors"
		tab VGE_NOVC

		recode VGE_NOVC 0=1 1=0, gen(VGE_VC)
		label var VGE_VC "=1 if the village has clinics"
		tab VGE_VC

		* VC equipment index
		d VC_EQUIM2
		sum VC_EQUIM2
		replace VC_EQUIM2=0 if VGE_NOVC==1           // Impute zeros for villages without VD

		* drug
		sum VC_DRUGM
		replace VC_DRUGM=0 if VGE_NOVC==1            // Impute zeros for villages without VD		

			gen VC_DRUGMln=ln(VC_DRUGM+1)
			label var VC_DRUGMln "Log of drugs in village (Maximum)"
			sum VC_DRUGMln

		* VD/POP
		d VD_DOCT
		tab VD_DOCT
		gen VD_POP=VD_DOCT/VGE_POP*1000
		label var VD_POP "No of village clinicians per 1000 population"
		sum VD_POP
		replace VD_POP=0 if VGE_NOVC==1            // Impute zeros for villages without VD

		* whether or not the village has licensed doctors or licensed assistant doctors
		gen VGE_LICDOC=(VD_PRACT>0)|(VD_ASSIST>0)      
		label var VGE_LICDOC "=1 if having licensed doctors or licensed assistant doctors (0/1)"		
		tab VGE_LICDOC, m

		* practicing year in this village
		sum VD_MEDY
		replace VD_MEDY=0 if VGE_NOVC==1            // Impute zeros for villages without VD

		* Distance to the nearest THC (Reported by Household)
		d VGE_THCDis1
		sum VGE_THCDis1                            // 17 missing

		* Distance to the nearest county hospital (Reported by Household)
		d VGE_CHDis1
		sum VGE_CHDis1

	* generate regression sample

		global Y "VCUSER"

		global N "LDISEASE HEALTH"                                              // need variables
		global P "AGE_CAT GENDER_MALE EDU_CAT"                                  // predisposing variables
		global E "NCMS_REIM ASSET_INX2 "                                        // enabling variable

		global V "VC_EQUIM2 VC_DRUGMln VD_POP VGE_LICDOC VD_MEDY VGE_THCDis1 VGE_CHDis1"   // village clinic variables

		d $Y $N $P $E $V
		reg $Y $N $P $E $V
		gen s=1 if e(sample)==1                                     
		drop if s==.                                                      // drop 242 missing (2.40% sample size)
		drop s

		save "$output/Decomposition_mainregs.dta", replace 

		* Sample Size 9862


***************************************************************************
*             Changes in Facility Choice 2011-2018 (Figure 1)     
***************************************************************************

	use "$output/Decomposition_mainregs.dta", clear

	table1, by(YEAR) vars(CHOICE cat) cformat(%12.0f)

	tab CHOICE, gen(CHOICE_)

	collapse (mean) B1=CHOICE_1 B2=CHOICE_2 B3=CHOICE_3 B4=CHOICE_4 B5=CHOICE_5 ///
		(semean) SE1=CHOICE_1 SE2=CHOICE_2 SE3=CHOICE_3 SE4=CHOICE_4 SE5=CHOICE_5, by(YEAR)

	reshape long B SE, i(YEAR) j(GROUP)
	label define GROUP 1 "Self-Treatment" 2 "VC" 3 "THC" 4 "CH" 5 "Higher"
	label val GROUP GROUP

	sort GROUP YEAR
	gen x = _n
	replace x = _n + 2 if _n >= 4
	replace x = _n + 4 if _n >= 7
	replace x = _n + 6 if _n >= 10
	replace x = _n + 8 if _n >= 13

	label  define x      1  "2011"  2 "2015"  3 "2018"  ///
	                     6  "2011"  7 "2015"  8 "2018"  ///
			    		11  "2011" 12 "2015" 13 "2018"  ///
			    		16  "2011" 17 "2015" 18 "2018"	///
			    		21  "2011" 22 "2015" 23 "2018"

	label  value x x

	gen Bu = B + 1.96*SE
	gen Bl = B - 1.96*SE

	separate B, by(YEAR)
	separate Bu, by(YEAR)
	separate Bl, by(YEAR)

	twoway (scatter B2011 x , msymbol(S) msize(medium)) ///
	       (rcap Bu2011 Bl2011 x)  ///
	       (scatter B2015 x , msymbol(Oh) msize(large)) ///
	       (rcap Bu2015 Bl2015 x )   ///
	       (scatter B2018 x , msymbol(T) msize(medium)) ///
	       (rcap Bu2018 Bl2018 x ) ///
	       (line B x if x<=3) ///
	       (line B x if x>=6 & x<=8) ///
	       (line B x if x>=11 & x<=13) ///
	       (line B x if x>=16 & x<=18) ///
	       (line B x if x>=21 & x<=23),  ///
		   xlabel(none) xtitle(" ") b2("Facility Choice") ///
		   ytitle(Percentage) ///
		   text(-0.03 2 "Self-Treat", place(c)) ///
		   text(-0.03 7 "VC", place(c)) ///
		   text(-0.03 12 "THC", place(c)) ///
		   text(-0.03 17 "CH", place(c)) ///
		   text(-0.03 22 "HIGHER", place(c)) ///
		   legend(order(1 "2011" 3 "2015" 5 "2018") position(1) row(1) ring(0)) ///
		   name(figure1, replace)

	sort YEAR GROUP

	export excel using "$output/figure1_data.xls", firstrow(variables) replace


*********************************************************************
*                Summary Characteristics (Table 2)     
*********************************************************************


	use "$output/Decomposition_mainregs.dta", clear

	table1 if LDISEASE==1, by(YEAR) vars(CHOICE cat) cformat(%12.0f)
	table1 if LDISEASE==2, by(YEAR) vars(CHOICE cat) cformat(%12.0f)

	* Summary table (Part I: Based on all the patients)
	table1, by(YEAR) vars(VCUSER bin \ LDISEASE cat \ HEALTH cat \ ///
		AGE_CAT cat \ GENDER_MALE bine \ EDU_CAT cat \ NCMS_REIM cat \ ASSET_INX2 contn %2.1f \ ///
		VGE_VC bin \ VGE_THCDis1 contn %2.1f \ VGE_CHDis1 contn %2.1f) cformat(%12.0f) /// 
		saving("$output/Summary_1.xls",replace)

		* Summary table clustering by Village
		foreach v in LDISEASE HEALTH AGE_CAT EDU_CAT NCMS_REIM {
			tab `v', gen(`v'_) 
			}

		global N2 "LDISEASE_1 LDISEASE_2 LDISEASE_3 LDISEASE_4 LDISEASE_5 LDISEASE_6 LDISEASE_7 LDISEASE_8 LDISEASE_9 HEALTH_1 HEALTH_2 HEALTH_3 HEALTH_4 HEALTH_5"   // need variables
		global P2 "AGE_CAT_1 AGE_CAT_2 AGE_CAT_3 AGE_CAT_4 GENDER_MALE EDU_CAT_1 EDU_CAT_2 EDU_CAT_3 EDU_CAT_4 EDU_CAT_5 EDU_CAT_6"       // predisposing variables
		global E2 "NCMS_REIM_1 NCMS_REIM_2 NCMS_REIM_3 ASSET_INX2"                                                                   // enabling variable
		global V2 "VGE_VC VGE_THCDis1 VGE_CHDis1"                        // village clinic variables

		d  $N2 $P2 $E2 $V2
		reg VCUSER i.YEAR, cluster(VID)
			outreg2 using "$output/Summary_2.xls", replace ctitle(VCUSER) dec(3) nocons adds(F-test, e(p))
		
			foreach v in $N2 $P2 $E2 $V2 {
				reg `v' i.YEAR, cluster(VID)
				outreg2 using "$output/Summary_2.xls", append ctitle(`v') dec(3) nocons adds(F-test, e(p))
				}


	* Summary table (Part II: Village clinic characteristics based on patients with access to village clinics)
	table1 if VGE_VC==1, by(YEAR) vars(VC_EQUIM2 contn %2.1f \ VC_DRUGM contn %2.1f \ VD_POP contn %2.1f \ ///
		VGE_LICDOC bin \ VD_MEDY contn %2.1f) cformat(%12.0f) /// 
		saving("$output/Summary_3.xls",replace)


		global V3 "VC_EQUIM2 VC_DRUGM VD_POP VGE_LICDOC VD_MEDY"               // village clinic variables

		d  $V3
		reg VCUSER i.YEAR if VGE_VC==1, cluster(VID)
			outreg2 using "$output/Summary_4.xls", replace ctitle(VCUSER) dec(3) nocons adds(F-test, e(p))
		
			foreach v in $V3 {
				reg `v' i.YEAR if VGE_VC==1, cluster(VID)
				outreg2 using "$output/Summary_4.xls", append ctitle(`v') dec(3) nocons adds(F-test, e(p))
				}


**********************************************************
*               Village Characteristics (Table A4)       
**********************************************************

	use "$datadir/Decomposition_village.dta", clear

	* convert per capita income to 2011 values
	gen CPI=.
	replace CPI=114.922 if YEAR==2015
	replace CPI=105.554 if YEAR==2011
	replace CPI=121.559 if YEAR==2018
	// source: https://data.worldbank.org/indicator/FP.CPI.TOTL?end=2018&locations=CN&start=1986

	gen VGE_PCINC_CPI=VGE_PCINC*105.554/CPI
	label var VGE_PCINC_CPI "per capita income in 2011 value"

	* Number of Village Clinics
	tab VC_COUNT
	replace VC_COUNT=0 if VC_COUNT==.

	* Summary Table
	table1, by(YEAR) vars(VGE_POP contn %2.1f \ VGE_PCINC_CPI contn %2.1f \ VC_COUNT cat \ NCMS_VC bine) cformat(%12.0f) ///
		saving("$output/Village_1.xls",replace)


*********************************************************
*            Village Clinics and Doctors (Table A4)     
*********************************************************

	use "$datadir/Decomposition_clinic", clear

	table1, by(YEAR) vars(VCEQUIIND2 contn %2.1f \ VDRUG contn %2.1f) ///
		saving("$output/Clinic_1.xls",replace)


	use "$datadir/Decomposition_doctor", clear

	drop if VDIAGNOSI==0

	table1, by(YEAR) vars(VDAGE contn %2.1f \ VDMEDY contn %2.1f \ VDMALE bine \ VDEDU cat \ VDCERTIF cat) cformat(%12.0f) ///
		saving("$output/Doctor_1.xls",replace)


**************************************************************
*           Attrition & Summary Table (Table A1)    
**************************************************************

use "$datadir/Decomposition_household", clear         

	table1, by(YEAR) percent vars(AGE contn %2.1f \ GENDER cat \ EDU_CAT cat \ ///
		HEALTH cat \ ASSET_INX2 contn %2.1f \ SICK bin) cformat(%12.0f) ///
		saving("$output/attrition_all.xls",replace)

	table1 if YEAR==2015, by(stay2) percent vars(AGE contn %2.1f \ GENDER cat \ EDU_CAT cat \ ///
		HEALTH cat \ ASSET_INX2 contn %2.1f \ SICK bin) cformat(%12.0f) ///
		saving("$output/attrition_2015.xls",replace)

	table1 if YEAR==2018, by(stay3) percent vars(AGE contn %2.1f \ GENDER cat \ EDU_CAT cat \ ///
		HEALTH cat \ ASSET_INX2 contn %2.1f \ SICK bin) cformat(%12.0f) ///
		saving("$output/attrition_2018.xls",replace)



*************************************************************
*                   Main Regressions      
*************************************************************

use "$output/Decomposition_mainregs.dta", clear

	global A "i.LDISEASE i.HEALTH"                                      // need variables
	global B "i.AGE_CAT GENDER_MALE i.EDU_CAT"                          // predisposing variables
	global C "i.NCMS_REIM ASSET_INX2"                                   // enabling variable

	global D "VGE_VC VGE_THCDis1 VGE_CHDis1"                            // access variables
	global E "VC_EQUIM2 VC_DRUGMln VD_POP VGE_LICDOC VD_MEDY"           // village clinic variables

	gen CID=substr(VID,1,2)
	destring CID, replace

	* OLS Regression (Unconditional) by Year

	eststo clear

		eststo, title(2011):  reg VCUSER $A $B $C $D $E i.CID if YEAR==2011, vce(cluster VID) 
		eststo, title(2015):  reg VCUSER $A $B $C $D $E i.CID if YEAR==2015, vce(cluster VID) 
		eststo, title(2018):  reg VCUSER $A $B $C $D $E i.CID if YEAR==2018, vce(cluster VID) 

		esttab, label b(3) se(3) star(* 0.10 ** 0.05 *** 0.01) replace nogaps

		esttab using "$output/Regression_ols.csv", b(%9.3fc) se(%9.3fc) ///
			starlevels( * 0.1 ** 0.05 *** 0.01) ar2(2) label replace nobase nogap mti compress  ///
			title("Main regression by OLS")

	* Logit Regressions by Year

	eststo clear

		logit VCUSER $A $B $C $D $E i.CID if YEAR==2011, vce(cluster VID)
		eststo, title(2011):  margins, dydx(*) post

		logit VCUSER $A $B $C $D $E i.CID if YEAR==2015, vce(cluster VID)
		eststo, title(2015):  margins, dydx(*) post

		logit VCUSER $A $B $C $D $E i.CID if YEAR==2018, vce(cluster VID)
		eststo, title(2018):  margins, dydx(*) post

		esttab, label b(3) se(3) star(* 0.10 ** 0.05 *** 0.01) replace nogaps

		esttab using "$output/Regression_logit.csv", b(%9.3fc) se(%9.3fc) ///
			starlevels( * 0.1 ** 0.05 *** 0.01) ar2(2) label replace nobase nogap mti compress  ///
			title("Main regression by Logit")


*************************************************************
*                      Main Decomposition      
*************************************************************

* Linear OB Decomposition

use "$output/Decomposition_mainregs.dta", clear

	gen CID=substr(VID,1,2)
	destring CID, replace

	* generate year dummies
	tab YEAR

	gen YEAR11_18=.
	replace YEAR11_18=0 if YEAR==2018
	replace YEAR11_18=1 if YEAR==2011
	label define YEAR11_18 0 "2018" 1 "2011"
	label val YEAR11_18 YEAR11_18
	tab YEAR11_18

	gen YEAR11_15=.
	replace YEAR11_15=0 if YEAR==2015
	replace YEAR11_15=1 if YEAR==2011
	label define YEAR11_15 0 "2015" 1 "2011"
	label val YEAR11_15 YEAR11_15
	tab YEAR11_15

	gen YEAR15_18=.
	replace YEAR15_18=0 if YEAR==2018
	replace YEAR15_18=1 if YEAR==2015
	label define YEAR15_18 0 "2018" 1 "2015"
	label val YEAR15_18 YEAR15_18
	tab YEAR15_18

	* generate dummy variables
	foreach v in LDISEASE AGE_CAT HEALTH EDU_CAT NCMS_REIM CID {
		tab `v', gen(`v'_) 
		}

	* Variable Lists
	global A2 "normalize(LDISEASE_1-LDISEASE_9)"
	global B2 "normalize(HEALTH_1-HEALTH_5)"
	global C2 "normalize(AGE_CAT_1-AGE_CAT_4)" 
	global D2 "GENDER_MALE" 
	global E2 "normalize(EDU_CAT_1-EDU_CAT_6)" 
	global F2 "normalize(NCMS_REIM_1-NCMS_REIM_3)"   
	global G2 "ASSET_INX2"
	global H2 "VGE_VC"
	global I2 "VGE_THCDis1 VGE_CHDis1" 	
	global J2 "VC_EQUIM2"
	global K2 "VC_DRUGMln"
	global L2 "VD_POP"
	global M2 "VGE_LICDOC"
	global N2 "VD_MEDY"
	global P2 "normalize(CID_1-CID_25)" 


	* Standard OB Decomposition
	eststo clear

		eststo: oaxaca VCUSER (Disease: $A2) (Health: $B2) (Aging: $C2) (Gender: $D2) (Education: $E2) ///
			 (Insurance: $F2) (Household: $G2) (VC: $H2) (Distance: $I2) (Equipment: $J2) (Drug: $K2) ///
			 (Doctor: $L2) (Qualification: $M2) (PracticingY: $N2) (CountyFE: $P2), by(YEAR11_18) pooled detail relax vce(cluster VID)	

		esttab, label b(3) se(3) star(* 0.10 ** 0.05 *** 0.01) replace nogaps

		esttab using "$output/Decomposition_OB.csv", b(%9.3fc) se(%9.3fc) ///
			starlevels( * 0.1 ** 0.05 *** 0.01) ar2(2) label replace nobase nogap mti compress  ///
			title("OB Decomposition")


	* Standard OB Decomposition by Intermediate Periods 
	eststo clear

		eststo: oaxaca VCUSER (Disease: $A2) (Health: $B2) (Aging: $C2) (Gender: $D2) (Education: $E2) ///
			 (Insurance: $F2) (Household: $G2) (VC: $H2) (Distance: $I2) (Equipment: $J2) (Drug: $K2) ///
			 (Doctor: $L2) (Qualification: $M2) (PracticingY: $N2) (CountyFE: $P2), by(YEAR11_15) pooled detail relax vce(cluster VID)	

		eststo: oaxaca VCUSER (Disease: $A2) (Health: $B2) (Aging: $C2) (Gender: $D2) (Education: $E2) ///
			 (Insurance: $F2) (Household: $G2) (VC: $H2) (Distance: $I2) (Equipment: $J2) (Drug: $K2) ///
			 (Doctor: $L2) (Qualification: $M2) (PracticingY: $N2) (CountyFE: $P2), by(YEAR15_18) pooled detail relax vce(cluster VID)	

		esttab, label b(3) se(3) star(* 0.10 ** 0.05 *** 0.01) replace nogaps

		esttab using "$output/Decomposition_OB_intermediate.csv", b(%9.3fc) se(%9.3fc) ///
			starlevels( * 0.1 ** 0.05 *** 0.01) ar2(2) label replace nobase nogap mti compress  ///
			title("OB Decomposition Bootstrap")


	* Fairlie Non-linear Decomposition

	global A3 "LDISEASE_2-LDISEASE_9"
	global B3 "HEALTH_2-HEALTH_5"
	global C3 "AGE_CAT_2-AGE_CAT_4" 
	global D3 "GENDER_MALE" 
	global E3 "EDU_CAT_2-EDU_CAT_6" 
	global F3 "NCMS_REIM_2-NCMS_REIM_3"   
	global G3 "ASSET_INX2"
	global H3 "VGE_VC"
	global I3 "VGE_THCDis1 VGE_CHDis1" 
	global J3 "VC_EQUIM2"
	global K3 "VC_DRUGMln"
	global L3 "VD_POP"
	global M3 "VGE_LICDOC"
	global N3 "VD_MEDY"
	global P3 "CID_2-CID_25" 

	set seed 2020
	
	* ro: randomly ordering of variables in the detailed decomposition
	* pooled: use a pooled model as reference
	* The default is to use a logit model
	* reps: number of decomposition replications
	
	eststo clear

		eststo: fairlie VCUSER (Disease: $A3) (Health: $B3) (Aging: $C3) (Gender: $D3) (Education: $E3) ///
			 (Insurance: $F3) (Household: $G3) (VC: $H3) (Distance: $I3) (Equipment: $J3) (Drug: $K3) ///
			 (Doctor: $L3) (Qualification: $M3) (PracticingY: $N3) (CountyFE: $P3), by(YEAR11_18) pooled(YEAR11_18) ro reps(2000)	

		esttab, label b(3) se(3) star(* 0.10 ** 0.05 *** 0.01) replace nogaps

		esttab using "$output/Decomposition_Fairlie.csv", b(%9.3fc) se(%9.3fc) ///
			starlevels( * 0.1 ** 0.05 *** 0.01) ar2(2) label replace nobase nogap mti compress  ///
			title("Fairlie Non-linear Decomposition")


********************************************************************
*             Two-steps Analysis      
********************************************************************

use "$output/Decomposition_mainregs.dta", clear

	global A "i.LDISEASE i.HEALTH"                                      // need variables
	global B "i.AGE_CAT GENDER_MALE i.EDU_CAT"                          // predisposing variables
	global C "i.NCMS_REIM ASSET_INX2"                                   // enabling variable

	global D "VGE_VC VGE_THCDis1 VGE_CHDis1"                            // access variables
	global E "VC_EQUIM2 VC_DRUGMln VD_POP VGE_LICDOC VD_MEDY"           // village clinic variables

	gen CID=substr(VID,1,2)
	destring CID, replace

	* OLS Regression (Two steps)

	eststo clear

		eststo, title(2011):  reg DOCVIST $A $B $C $D $E i.CID if YEAR==2011, vce(cluster VID) 
		eststo, title(2015):  reg DOCVIST $A $B $C $D $E i.CID if YEAR==2015, vce(cluster VID) 
		eststo, title(2018):  reg DOCVIST $A $B $C $D $E i.CID if YEAR==2018, vce(cluster VID) 

		eststo, title(2011):  reg VCUSER_CON $A $B $C $D $E i.CID if YEAR==2011, vce(cluster VID) 
		eststo, title(2015):  reg VCUSER_CON $A $B $C $D $E i.CID if YEAR==2015, vce(cluster VID) 
		eststo, title(2018):  reg VCUSER_CON $A $B $C $D $E i.CID if YEAR==2018, vce(cluster VID) 

		esttab, label b(3) se(3) star(* 0.10 ** 0.05 *** 0.01) replace nogaps

		esttab using "$output/Regression_ols_2step.csv", b(%9.3fc) se(%9.3fc) ///
			starlevels( * 0.1 ** 0.05 *** 0.01) ar2(2) label replace nobase nogap mti compress  ///
			title("Main regression by OLS")	


	* generate year dummies
	tab YEAR
	gen YEAR11_18=.
	replace YEAR11_18=0 if YEAR==2018
	replace YEAR11_18=1 if YEAR==2011
	label define YEAR11_18 0 "2018" 1 "2011"
	label val YEAR11_18 YEAR11_18
	tab YEAR11_18

	* generate dummy variables
	foreach v in LDISEASE AGE_CAT HEALTH EDU_CAT NCMS_REIM CID {
		tab `v', gen(`v'_) 
		}

	* Variable Lists
	global A2 "normalize(LDISEASE_1-LDISEASE_9)"
	global B2 "normalize(HEALTH_1-HEALTH_5)"
	global C2 "normalize(AGE_CAT_1-AGE_CAT_4)" 
	global D2 "GENDER_MALE" 
	global E2 "normalize(EDU_CAT_1-EDU_CAT_6)" 
	global F2 "normalize(NCMS_REIM_1-NCMS_REIM_3)"   
	global G2 "ASSET_INX2"
	global H2 "VGE_VC"
	global I2 "VGE_THCDis1 VGE_CHDis1" 	
	global J2 "VC_EQUIM2"
	global K2 "VC_DRUGMln"
	global L2 "VD_POP"
	global M2 "VGE_LICDOC"
	global N2 "VD_MEDY"
	global P2 "normalize(CID_1-CID_25)" 


	* Standard OB Decomposition (Two steps)
	eststo clear

		eststo: oaxaca DOCVIST (Disease: $A2) (Health: $B2) (Aging: $C2) (Gender: $D2) (Education: $E2) ///
			 (Insurance: $F2) (Household: $G2) (VC: $H2) (Distance: $I2) (Equipment: $J2) (Drug: $K2) ///
			 (Doctor: $L2) (Qualification: $M2) (PracticingY: $N2) (CountyFE: $P2), by(YEAR11_18) pooled detail relax vce(cluster VID)	

		eststo: oaxaca VCUSER_CON (Disease: $A2) (Health: $B2) (Aging: $C2) (Gender: $D2) (Education: $E2) ///
			 (Insurance: $F2) (Household: $G2) (VC: $H2) (Distance: $I2) (Equipment: $J2) (Drug: $K2) ///
			 (Doctor: $L2) (Qualification: $M2) (PracticingY: $N2) (CountyFE: $P2), by(YEAR11_18) pooled detail relax vce(cluster VID)	

		esttab, label b(3) se(3) star(* 0.10 ** 0.05 *** 0.01) replace nogaps

		esttab using "$output/Decomposition_OB_2step.csv", b(%9.3fc) se(%9.3fc) ///
			starlevels( * 0.1 ** 0.05 *** 0.01) ar2(2) label replace nobase nogap mti compress  ///
			title("OB Decomposition")


********************************************************************
*            Analysis by Provinces (Added for SSM R&R)     
********************************************************************

use "$output/Decomposition_mainregs.dta", clear

	gen PVID=substr(VID,1,1)
	destring PVID, replace
	tab PVID

	gen CID=substr(VID,1,2)
	destring CID, replace

	* Summary table by Provinces 

	* Part I: Based on all the patients
	table1 if PVID==1, by(YEAR) vars(VCUSER bin \ LDISEASE cat \ HEALTH cat \ ///
		AGE_CAT cat \ GENDER_MALE bine \ EDU_CAT cat \ NCMS_REIM cat \ ASSET_INX2 contn %2.1f \ ///
		VGE_VC bin \ VGE_THCDis1 contn %2.1f \ VGE_CHDis1 contn %2.1f) cformat(%12.0f) /// 
		saving("$output/Summary_PVID1_1.xls",replace)

	table1 if PVID==3, by(YEAR) vars(VCUSER bin \ LDISEASE cat \ HEALTH cat \ ///
		AGE_CAT cat \ GENDER_MALE bine \ EDU_CAT cat \ NCMS_REIM cat \ ASSET_INX2 contn %2.1f \ ///
		VGE_VC bin \ VGE_THCDis1 contn %2.1f \ VGE_CHDis1 contn %2.1f) cformat(%12.0f) /// 
		saving("$output/Summary_PVID3_1.xls",replace)

	table1 if PVID==4, by(YEAR) vars(VCUSER bin \ LDISEASE cat \ HEALTH cat \ ///
		AGE_CAT cat \ GENDER_MALE bine \ EDU_CAT cat \ NCMS_REIM cat \ ASSET_INX2 contn %2.1f \ ///
		VGE_VC bin \ VGE_THCDis1 contn %2.1f \ VGE_CHDis1 contn %2.1f) cformat(%12.0f) /// 
		saving("$output/Summary_PVID4_1.xls",replace)

	table1 if PVID==5, by(YEAR) vars(VCUSER bin \ LDISEASE cat \ HEALTH cat \ ///
		AGE_CAT cat \ GENDER_MALE bine \ EDU_CAT cat \ NCMS_REIM cat \ ASSET_INX2 contn %2.1f \ ///
		VGE_VC bin \ VGE_THCDis1 contn %2.1f \ VGE_CHDis1 contn %2.1f) cformat(%12.0f) /// 
		saving("$output/Summary_PVID5_1.xls",replace)

	table1 if PVID==6, by(YEAR) vars(VCUSER bin \ LDISEASE cat \ HEALTH cat \ ///
		AGE_CAT cat \ GENDER_MALE bine \ EDU_CAT cat \ NCMS_REIM cat \ ASSET_INX2 contn %2.1f \ ///
		VGE_VC bin \ VGE_THCDis1 contn %2.1f \ VGE_CHDis1 contn %2.1f) cformat(%12.0f) /// 
		saving("$output/Summary_PVID6_1.xls",replace)


	* Part II: Village clinic characteristics based on patients with access to village clinics
	table1 if VGE_VC==1&PVID==1, by(YEAR) vars(VC_EQUIM2 contn %2.1f \ VC_DRUGM contn %2.1f \ VD_POP contn %2.1f \ ///
		VGE_LICDOC bin \ VD_MEDY contn %2.1f) cformat(%12.0f) /// 
		saving("$output/Summary_PVID1_2.xls",replace)

	table1 if VGE_VC==1&PVID==3, by(YEAR) vars(VC_EQUIM2 contn %2.1f \ VC_DRUGM contn %2.1f \ VD_POP contn %2.1f \ ///
		VGE_LICDOC bin \ VD_MEDY contn %2.1f) cformat(%12.0f) /// 
		saving("$output/Summary_PVID3_2.xls",replace)

	table1 if VGE_VC==1&PVID==4, by(YEAR) vars(VC_EQUIM2 contn %2.1f \ VC_DRUGM contn %2.1f \ VD_POP contn %2.1f \ ///
		VGE_LICDOC bin \ VD_MEDY contn %2.1f) cformat(%12.0f) /// 
		saving("$output/Summary_PVID4_2.xls",replace)

	table1 if VGE_VC==1&PVID==5, by(YEAR) vars(VC_EQUIM2 contn %2.1f \ VC_DRUGM contn %2.1f \ VD_POP contn %2.1f \ ///
		VGE_LICDOC bin \ VD_MEDY contn %2.1f) cformat(%12.0f) /// 
		saving("$output/Summary_PVID5_2.xls",replace)

	table1 if VGE_VC==1&PVID==6, by(YEAR) vars(VC_EQUIM2 contn %2.1f \ VC_DRUGM contn %2.1f \ VD_POP contn %2.1f \ ///
		VGE_LICDOC bin \ VD_MEDY contn %2.1f) cformat(%12.0f) /// 
		saving("$output/Summary_PVID6_2.xls",replace)


	* Regression by Provinces

	global A "i.LDISEASE i.HEALTH"                                      // need variables
	global B "i.AGE_CAT GENDER_MALE i.EDU_CAT"                          // predisposing variables
	global C "i.NCMS_REIM ASSET_INX2"                                   // enabling variable

	global D "VGE_VC VGE_THCDis1 VGE_CHDis1"                            // access variables
	global E "VC_EQUIM2 VC_DRUGMln VD_POP VGE_LICDOC VD_MEDY"           // village clinic variables


	eststo clear

		eststo, title(2011):  reg VCUSER $A $B $C $D $E i.CID if PVID==1&YEAR==2011, vce(cluster VID) 
		eststo, title(2015):  reg VCUSER $A $B $C $D $E i.CID if PVID==1&YEAR==2015, vce(cluster VID) 
		eststo, title(2018):  reg VCUSER $A $B $C $D $E i.CID if PVID==1&YEAR==2018, vce(cluster VID) 

		eststo, title(2011):  reg VCUSER $A $B $C $D $E i.CID if PVID==3&YEAR==2011, vce(cluster VID) 
		eststo, title(2015):  reg VCUSER $A $B $C $D $E i.CID if PVID==3&YEAR==2015, vce(cluster VID) 
		eststo, title(2018):  reg VCUSER $A $B $C $D $E i.CID if PVID==3&YEAR==2018, vce(cluster VID) 

		eststo, title(2011):  reg VCUSER $A $B $C $D $E i.CID if PVID==4&YEAR==2011, vce(cluster VID) 
		eststo, title(2015):  reg VCUSER $A $B $C $D $E i.CID if PVID==4&YEAR==2015, vce(cluster VID) 
		eststo, title(2018):  reg VCUSER $A $B $C $D $E i.CID if PVID==4&YEAR==2018, vce(cluster VID) 

		eststo, title(2011):  reg VCUSER $A $B $C $D $E i.CID if PVID==5&YEAR==2011, vce(cluster VID) 
		eststo, title(2015):  reg VCUSER $A $B $C $D $E i.CID if PVID==5&YEAR==2015, vce(cluster VID) 
		eststo, title(2018):  reg VCUSER $A $B $C $D $E i.CID if PVID==5&YEAR==2018, vce(cluster VID) 

		eststo, title(2011):  reg VCUSER $A $B $C $D $E i.CID if PVID==6&YEAR==2011, vce(cluster VID) 
		eststo, title(2015):  reg VCUSER $A $B $C $D $E i.CID if PVID==6&YEAR==2015, vce(cluster VID) 
		eststo, title(2018):  reg VCUSER $A $B $C $D $E i.CID if PVID==6&YEAR==2018, vce(cluster VID) 

		esttab, label b(3) se(3) star(* 0.10 ** 0.05 *** 0.01) replace nogaps

		esttab using "$output/Regression_ols_PVID.csv", b(%9.3fc) se(%9.3fc) ///
			starlevels( * 0.1 ** 0.05 *** 0.01) ar2(2) label replace nobase nogap mti compress  ///
			title("Main regression by OLS")


	* Decomposition

	tab YEAR
	gen YEAR11_18=.
	replace YEAR11_18=0 if YEAR==2018
	replace YEAR11_18=1 if YEAR==2011
	label define YEAR11_18 0 "2018" 1 "2011"
	label val YEAR11_18 YEAR11_18
	tab YEAR11_18

	foreach v in LDISEASE AGE_CAT HEALTH EDU_CAT NCMS_REIM CID {
		tab `v', gen(`v'_) 
		}

	global A2 "normalize(LDISEASE_1-LDISEASE_9)"
	global B2 "normalize(HEALTH_1-HEALTH_5)"
	global C2 "normalize(AGE_CAT_1-AGE_CAT_4)" 
	global D2 "GENDER_MALE" 
	global E2 "normalize(EDU_CAT_1-EDU_CAT_6)" 
	global F2 "normalize(NCMS_REIM_1-NCMS_REIM_3)"   
	global G2 "ASSET_INX2"
	global H2 "VGE_VC"
	global I2 "VGE_THCDis1 VGE_CHDis1" 	
	global J2 "VC_EQUIM2"
	global K2 "VC_DRUGMln"
	global L2 "VD_POP"
	global M2 "VGE_LICDOC"
	global N2 "VD_MEDY"
	global P2 "normalize(CID_1-CID_25)" 


	eststo clear

		eststo: oaxaca VCUSER (Disease: $A2) (Health: $B2) (Aging: $C2) (Gender: $D2) (Education: $E2) ///
			 (Insurance: $F2) (Household: $G2) (VC: $H2) (Distance: $I2) (Equipment: $J2) (Drug: $K2) ///
			 (Doctor: $L2) (Qualification: $M2) (PracticingY: $N2) (CountyFE: $P2) if PVID==1, by(YEAR11_18) pooled detail relax vce(cluster VID)	

		eststo: oaxaca VCUSER (Disease: $A2) (Health: $B2) (Aging: $C2) (Gender: $D2) (Education: $E2) ///
			 (Insurance: $F2) (Household: $G2) (VC: $H2) (Distance: $I2) (Equipment: $J2) (Drug: $K2) ///
			 (Doctor: $L2) (Qualification: $M2) (PracticingY: $N2) (CountyFE: $P2) if PVID==3, by(YEAR11_18) pooled detail relax vce(cluster VID)	

		eststo: oaxaca VCUSER (Disease: $A2) (Health: $B2) (Aging: $C2) (Gender: $D2) (Education: $E2) ///
			 (Insurance: $F2) (Household: $G2) (VC: $H2) (Distance: $I2) (Equipment: $J2) (Drug: $K2) ///
			 (Doctor: $L2) (Qualification: $M2) (PracticingY: $N2) (CountyFE: $P2) if PVID==4, by(YEAR11_18) pooled detail relax vce(cluster VID)	

		eststo: oaxaca VCUSER (Disease: $A2) (Health: $B2) (Aging: $C2) (Gender: $D2) (Education: $E2) ///
			 (Insurance: $F2) (Household: $G2) (VC: $H2) (Distance: $I2) (Equipment: $J2) (Drug: $K2) ///
			 (Doctor: $L2) (Qualification: $M2) (PracticingY: $N2) (CountyFE: $P2) if PVID==5, by(YEAR11_18) pooled detail relax vce(cluster VID)	

		eststo: oaxaca VCUSER (Disease: $A2) (Health: $B2) (Aging: $C2) (Gender: $D2) (Education: $E2) ///
			 (Insurance: $F2) (Household: $G2) (VC: $H2) (Distance: $I2) (Equipment: $J2) (Drug: $K2) ///
			 (Doctor: $L2) (Qualification: $M2) (PracticingY: $N2) (CountyFE: $P2) if PVID==6, by(YEAR11_18) pooled detail relax vce(cluster VID)	

		esttab, label b(3) se(3) star(* 0.10 ** 0.05 *** 0.01) replace nogaps

		esttab using "$output/Decomposition_OB_PVID.csv", b(%9.3fc) se(%9.3fc) ///
			starlevels( * 0.1 ** 0.05 *** 0.01) ar2(2) label replace nobase nogap mti compress  ///
			title("OB Decomposition")

exit

